package com.zyf.mapreduce.etl;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * @author Malegod_xiaofei
 * @create 2024-07-27-11:42
 */
public class WebLogMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        // 1 获取一行
        String line = value.toString();

        // 3 ETL
        boolean result = parsLog(line, context);
        if (!result) {
            return;
        }

        // 4 写出
        context.write(value, NullWritable.get());
    }

    private boolean parsLog(String line, Context context) {
        // 222.68.172.190 - - [18/Sep/2013:06:50:08 +0000] "-" 400 0 "-" "-"
        // 2 切割
        String[] fields = line.split(" ");

        // 3 判断一下日志的长度是否大于 11
        if (fields.length > 11) {
            return true;
        } else {
            return false;
        }
    }
}